import requests
from lxml import etree
import time
import json

domain = 'https://xiaohua.zol.com.cn/'
page_url = 'https://xiaohua.zol.com.cn/new/'

headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
}

finish_data = []


def parse_page(page_url):
    res = requests.get(page_url, headers=headers)
    text = res.text
    parser = etree.HTML(text)
    detial_url_list = parser.xpath(
        "//ul[@class='article-list']/li//a[@class='all-read']/@href")
    for detial_url in detial_url_list:
        detial_url = domain+detial_url
        parse_detial(detial_url)
        print('数据解析成功' + detial_url)
        time.sleep(1)


def parse_detial(detial_url):
    res = requests.get(detial_url, headers=headers)
    detial = res.text
    detialParser = etree.HTML(detial)
    parse_title = detialParser.xpath("//h1[@class='article-title']/text()")[0]
    parse_content = detialParser.xpath("//div[@class='article-text']/p/text()")
    parse_content = ''.join(parse_content).strip()
    finish_data.append({
        'title': parse_title,
        'content': parse_content
    })


def main():
    for page in range(1, 3):
        print(page)
        # 如果page等于1
        if page == 1:
            page_url = 'https://xiaohua.zol.com.cn/new/'
        else:
            page_url = f"https://xiaohua.zol.com.cn/new/{page}.html"
        parse_page(page_url)
        time.sleep(1)

   # 文件名称  以写的方式  ，utf-8
    with open('爬取笑话大全.json', 'w', encoding='utf-8') as fp:
        # ensure_ascii:如果没设置为False，那么中文会存储Unicode字符串
        json.dump(finish_data, fp, ensure_ascii=False)


if __name__ == '__main__':
    main()
